R Markdown

Exploratory Data Analysis for the Crime Forecasting Challenge

Import Data

d <- read.csv(file = "data/mp_data.csv", header = T, sep = ";", stringsAsFactors = F)

data <- d

# convert character to date
data$occ_date <- as.POSIXct(data$occ_date, format="%d/%m/%Y")

# removing na's
data <- na.omit(data)

Brief Data Description

## 'data.frame':    822300 obs. of  10 variables:
##  $ CATEGORY       : chr  "STREET CRIMES" "STREET CRIMES" "STREET CRIMES" "STREET CRIMES" ...
##  $ CALL.GROUPS    : chr  "DISORDER" "DISORDER" "DISORDER" "DISORDER" ...
##  $ final_case_type: chr  "DISTP " "DISTP " "DISTP " "DISTP " ...
##  $ CASE.DESC      : chr  "DISTURBANCE - PRIORITY                            " "DISTURBANCE - PRIORITY                            " "DISTURBANCE - PRIORITY                            " "DISTURBANCE - PRIORITY                            " ...
##  $ occ_date       : POSIXct, format: "2012-03-01" "2012-03-01" ...
##  $ x_coordinate   : int  7641076 7642640 7643599 7644359 7644771 7650214 7653737 7666126 7673214 7679775 ...
##  $ y_coordinate   : int  684831 683167 683216 693642 683859 692359 698495 671764 671625 678272 ...
##  $ census_tract   : int  4900 10600 10600 3502 10600 2401 3200 702 8302 9201 ...
##  $ DISTRICT       : int  810 842 842 590 842 660 620 922 971 952 ...
##  $ PRECINCT       : chr  "CE" "CE" "CE" "NO" ...
##  - attr(*, "na.action")=Class 'omit'  Named int [1:1947] 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 ...
##   .. ..- attr(*, "names")= chr [1:1947] "5715" "5716" "5717" "5718" ...
##        CATEGORY CALL.GROUPS final_case_type
## 1 STREET CRIMES    DISORDER          DISTP 
## 2 STREET CRIMES    DISORDER          DISTP 
## 3 STREET CRIMES    DISORDER          DISTP 
## 4 STREET CRIMES    DISORDER          DISTP 
## 5 STREET CRIMES    DISORDER          DISTP 
## 6 STREET CRIMES    DISORDER          DISTP 
##                                            CASE.DESC   occ_date
## 1 DISTURBANCE - PRIORITY                             2012-03-01
## 2 DISTURBANCE - PRIORITY                             2012-03-01
## 3 DISTURBANCE - PRIORITY                             2012-03-01
## 4 DISTURBANCE - PRIORITY                             2012-03-01
## 5 DISTURBANCE - PRIORITY                             2012-03-01
## 6 DISTURBANCE - PRIORITY                             2012-03-01
##   x_coordinate y_coordinate census_tract DISTRICT PRECINCT
## 1      7641076       684831         4900      810       CE
## 2      7642640       683167        10600      842       CE
## 3      7643599       683216        10600      842       CE
## 4      7644359       693642         3502      590       NO
## 5      7644771       683859        10600      842       CE
## 6      7650214       692359         2401      660       NO
## [1]    100   2303   4900   8202 980000
##    CATEGORY         CALL.GROUPS        final_case_type   
##  Length:822300      Length:822300      Length:822300     
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##   CASE.DESC            occ_date                    x_coordinate    
##  Length:822300      Min.   :2012-03-01 00:00:00   Min.   :7604367  
##  Class :character   1st Qu.:2013-06-23 00:00:00   1st Qu.:7644078  
##  Mode  :character   Median :2014-08-30 00:00:00   Median :7655131  
##                     Mean   :2014-08-01 19:47:47   Mean   :7657913  
##                     3rd Qu.:2015-09-20 00:00:00   3rd Qu.:7671669  
##                     Max.   :2016-09-30 00:00:00   Max.   :7711207  
##   y_coordinate     census_tract       DISTRICT       PRECINCT        
##  Min.   :642230   Min.   :   100   Min.   :510.0   Length:822300     
##  1st Qu.:676050   1st Qu.:  2303   1st Qu.:670.0   Class :character  
##  Median :683582   Median :  4900   Median :841.0   Mode  :character  
##  Mean   :684385   Mean   :  6323   Mean   :796.7                     
##  3rd Qu.:690687   3rd Qu.:  8202   3rd Qu.:932.0                     
##  Max.   :732085   Max.   :980000   Max.   :990.0
## [1] "STREET CRIMES"       "OTHER"               "MOTOR VEHICLE THEFT"
## [4] "BURGLARY"
## [1] "DISORDER"           "PERSON CRIME"       " SUSPICIOUS"       
## [4] "NON CRIMINAL/ADMIN" "PROPERTY CRIME"     "TRAFFIC"
##   [1] "DISTP " "DISTW " "VICE  " "ASSLTP" "ASSLTW" "ROBP  " "ROBW  "
##   [8] "SHOOTW" "SHOTS " "STABW " "THRETP" "THRETW" "GANG  " "AREACK"
##  [15] "PREMCK" "SUSP  " "SUSPP " "SUSPW " "ANIML " "ANIMLP" "BOMBTH"
##  [22] "CHEMTH" "DIST  " "ESCAPE" "FWB   " "FWH   " "FWI   " "FWN   "
##  [29] "NOISE " "CHEM  " "PARK  " "PARTY " "POLINV" "SCHL  " "SCHLP "
##  [36] "THRET " "TMET  " "TMETP " "TRASH " "TRASHP" "UNWNT " "TMETW "
##  [43] "UNWNTP" "UNWNTW" "W26   " "ASSIST" "77"     "CIVIL " "EVICT "
##  [50] "FOLLOW" "MSG   " "FLAG  " "PROP  " "RED   " "RIVPOL" "SEIZE "
##  [57] "SERVE " "STNDBY" "TRANS " "WARR  " "WARRC " "WELCK " "SUBSTP"
##  [64] "WELCKP" "ASSLT " "DEVICE" "ROB   " "SHOOT " "STAB  " "BURG  "
##  [71] "FRAUD " "FRAUDP" "THEFT " "IDENT " "THEFTC" "THEFTP" "VAND  "
##  [78] "VANDP " "VEHST " "ACCHR " "ACCHRP" "ACCINJ" "ACCNON" "ACCUNK"
##  [85] "DUII  " "HAZARD" "TRASTP" "WRONG " "FPURS " "TPURS " "VEHREC"
##  [92] "VEHSTP" "PROWLP" "BURGP " "SCHLW " "RSTLN " "ZERO  " "GREAT "
##  [99] "SCHLET" "HOSTGE"
##   [1] "DISTURBANCE - PRIORITY                            "
##   [2] "DISTURBANCE - WITH WEAPON *H                      "
##   [3] "VICE-DRUGS, LIQUOR, PROSTITUTION, GAMBLING        "
##   [4] "ASSAULT - PRIORITY                                "
##   [5] "ASSAULT - WITH WEAPON *H                          "
##   [6] "ROBBERY - PRIORITY *H                             "
##   [7] "ROBBERY - WITH WEAPON *H                          "
##   [8] "SHOOTING - WITH WEAPON *H                         "
##   [9] "SHOTS FIRED                                       "
##  [10] "STABBING - WITH WEAPON *H                         "
##  [11] "THREAT - PRIORITY                                 "
##  [12] "THREAT - WITH WEAPON *H                           "
##  [13] "GANG RELATED                                      "
##  [14] "AREA CHECK                                        "
##  [15] "PREMISE CHECK                                     "
##  [16] "SUSPICIOUS SUBJ, VEH, OR CIRCUMSTANCE             "
##  [17] "SUSPICIOUS - PRIORITY                             "
##  [18] "SUSPICIOUS - WITH WEAPON *H                       "
##  [19] "ANIMAL PROBLEM                                    "
##  [20] "ANIMAL PROBLEM - PRIORITY                         "
##  [21] "BOMB - THREAT (33B)                               "
##  [22] "CHEMICAL OR BIOLOGICAL THREAT (33CTH)             "
##  [23] "DISTURBANCE - COLD                                "
##  [24] "ESCAPE FROM CUSTODY                               "
##  [25] "FIREWORKS - NOISE (BROADCAST ONLY)                "
##  [26] "FIREWORKS - HAZARD                                "
##  [27] "FIREWORKS - ILLEGAL                               "
##  [28] "FIREWORKS - NOISE (MDC DISPATCH)                  "
##  [29] "NOISE DISTURBANCE                                 "
##  [30] "CHEMICAL OR BIOLOGICAL (33C)                      "
##  [31] "PARKING PROBLEM                                   "
##  [32] "PARTY DISTURBANCE                                 "
##  [33] "BOMB OR CHEM POLICE INVESTIGATION (33B/33C)       "
##  [34] "SCHOOL INCIDENT - COLD                            "
##  [35] "SCHOOL INCIDENT - PRIORITY                        "
##  [36] "THREAT - COLD                                     "
##  [37] "TRIMET INCIDENT - COLD                            "
##  [38] "TRIMET INCIDENT - PRIORITY                        "
##  [39] "ILLEGAL DUMPING - COLD                            "
##  [40] "ILLEGAL DUMPING - PRIORITY                        "
##  [41] "UNWANTED PERSON                                   "
##  [42] "TRIMET INCIDENT - WITH WEAPON *H                  "
##  [43] "UNWANTED PERSON - PRIORITY                        "
##  [44] "UNWANTED PERSON - WITH WEAPON *H                  "
##  [45] "DETOX TRANSPORT                                   "
##  [46] "ASSIST - CITIZEN OR AGENCY                        "
##  [47] "SUBJECT STOP - SDC                                "
##  [48] "CIVIL - CIVIL PROBLEM                             "
##  [49] "CIVIL - EVICTION                                  "
##  [50] "FOLLOW-UP                                         "
##  [51] "DELIVER MESSAGE                                   "
##  [52] "FLAGDOWN                                          "
##  [53] "PROPERTY LOST, FOUND, RECOVERED                   "
##  [54] "ASSISTANCE - FIRE / EMS NEED POLICE *H            "
##  [55] "RIVER - MARINE INCIDENT                           "
##  [56] "CIVIL - PROPERTY SEIZURE                          "
##  [57] "CIVIL - SERVE PAPERS                              "
##  [58] "CIVIL - STANDBY                                   "
##  [59] "TRANSPORT                                         "
##  [60] "WARRANT                                           "
##  [61] "WARRANT - WALK-IN / COUNTER                       "
##  [62] "WELFARE CHECK - COLD                              "
##  [63] "PERSON CONTACT (86)                               "
##  [64] "WELFARE CHECK - PRIORITY                          "
##  [65] "ASSAULT - COLD                                    "
##  [66] "BOMB - DEVICE DISCOVERED (33B) *H                 "
##  [67] "ROBBERY - COLD                                    "
##  [68] "SHOOTING - COLD                                   "
##  [69] "STABBING - COLD                                   "
##  [70] "BURGLARY - COLD                                   "
##  [71] "FRAUD - COLD                                      "
##  [72] "FRAUD - PRIORITY                                  "
##  [73] "THEFT - COLD                                      "
##  [74] "IDENTITY THEFT                                    "
##  [75] "THEFT - SUBJECT IN CUSTODY                        "
##  [76] "THEFT - PRIORITY                                  "
##  [77] "VANDALISM - COLD                                  "
##  [78] "VANDALISM - PRIORITY                              "
##  [79] "VEHICLE STOLEN - COLD                             "
##  [80] "ACCIDENT - HIT AND RUN - COLD                     "
##  [81] "ACCIDENT - HIT & RUN - PRIORITY                   "
##  [82] "ACCIDENT - INJURY                                 "
##  [83] "ACCIDENT - NON INJURY                             "
##  [84] "ACCIDENT - UNKNOWN INJURY                         "
##  [85] "DRIVING UNDER INFLUENCE                           "
##  [86] "HAZARD - HAZARDOUS CONDITION                      "
##  [87] "TRAFFIC STOP                                      "
##  [88] "HAZARD - WRONG-WAY DRIVER *H                      "
##  [89] "FOOT PURSUIT *H                                   "
##  [90] "TRAFFIC PURSUIT *H                                "
##  [91] "VEHICLE RECOVERED                                 "
##  [92] "VEHICLE STOLEN - PRIORITY                         "
##  [93] "PROWLER                                           "
##  [94] "BURGLARY - PRIORITY *H                            "
##  [95] "SCHOOL INCIDENT - WITH WEAPON *H                  "
##  [96] "ROLLING STOLEN *H                                 "
##  [97] "ASSISTANCE - RESPONDER EMERGENCY *H               "
##  [98] "GREAT - SRO INITIATED ACTIVITY                    "
##  [99] "SCHOOL EVENTS                                     "
## [100] "HOSTAGE SITUATION *H                              "
##   [1]   4900  10600   3502   2401   3200    702   8302   9201   9701   9804
##  [11]   4101   3501   2203    200   2902   8902    601   8100  10200   4002
##  [21]   5800   3401   3302   1801    501    602   8202   9502   3803   3801
##  [31]   5100   3301   1101   3601    301    701   7700   9000   9102   9301
##  [41]   6404   4001   3901   3902   1102   1302   7900   8400   9101   6502
##  [51]   3802   5900    802   2303   2100    402   7800   7300   8002    801
##  [61]   2903   9202   6403   3702   1400   2701   1602   9302   9501   9702
##  [71]   5000   7202    901   2000    302   6100   4800   8201   1201   1702
##  [81]   4500   7500   6200   5700   2502   3602   5200    100   2501   8800
##  [91]   4102   3701   2901   8301   1701   4200   2402   7400   7600   9400
## [101]   4700   6602   5600   3402   3100   2702   8600   8500   4300   6702
## [111]    401   5500    502   7201   2801   1301   8901   6701   1601    902
## [121]   8001   9803   1000   7000   1900   2802   8700   6001   2600   4602
## [131]   3000   1202   4601  30502   9903   6300   6501   1500   1802   6801
## [141]   6402  30600  20900   3603  22206   6802 980000   6002   6601   6900
## [151]  30102  30402  20800  22208  21802  20100

Including Plots

Category’s percentage:

Call description’s percentage:

Final case type’s percentage:

Census tract’s percentage:

District’s percentage:

Date’s percentage:

Multivariate Analysis

District Density Analysis:

## NULL

District histrogram’s density

District vs Category:

District vs Final Case Type:

Date histrogram’s density

Load data:

Date vs Category:

Date vs District: